#####
library(modelsummary)
library(marginaleffects)
library(tidyverse)
library(data.table)
library(ggplot2)
library(ggthemes); library(ggExtra); library(broom)
library(fixest);library(ggeffects)
load("hh_data.RData")
source("helpers.R")
####
dir.create("figures")
dir.create("tables")
## average policy loss over time

factor <- 1.5

hh_data |> 
ggplot(aes(x=year)) +
  geom_smooth(aes(y=policy_loss_Factor), method = "gam", 
              formula = y ~ s(x, k = 12, bs = "cs"), col="blue") +
  geom_smooth(aes(y=proeu*factor), method = "gam", 
              formula = y ~ s(x, k = 12, bs = "cs"), col="red") +
  scale_y_continuous(name = "Policy loss \n (blue)", 
                     sec.axis = sec_axis(~ . / factor,
                                         name = "Support for the EU \n (red)")) +
  theme_minimal() +
  xlab("")
ggsave("figures/policy_and_eu_support.png",
       width = 6, height = 4, units = "in", dpi = 450)

res_Keywords <- models(hh_data, "policy_loss_Keywords")
res_CMPgen <- models(hh_data, "policy_loss_CMPgen")
res_CMPecon <- models(hh_data, "policy_loss_CMPecon")
res_CMPsocial <- models(hh_data, "policy_loss_CMPsocial")
res_LLM <- models(hh_data, "policy_loss_LLM")
res_Factor <- models(hh_data, "policy_loss_Factor")

res_Factor_cum <- models(hh_data,"policy_loss_Factor_cum")
res_Keywords_cum <- models(hh_data, "policy_loss_Keywords_cum")
res_CMPgen_cum <- models(hh_data, "policy_loss_CMPgen_cum")
res_CMPecon_cum <- models(hh_data, "policy_loss_CMPecon_cum")
res_CMPsocial_cum <- models(hh_data, "policy_loss_CMPsocial_cum")
res_LLM_cum <- models(hh_data, "policy_loss_LLM_cum")

results_table(res_Keywords, "Keywords", "Results: Policy-loss Keywords", "tab:keywords_res", "tables/keywords_res.tex")
results_table(res_CMPgen, "CMPgen", "Results: Policy-loss CMPgen", "tab:gen", "tables/gen_res.tex")
results_table(res_CMPecon, "CMPecon", "Results: Policy-loss CMPecon", "tab:econ", "tables/econ_res.tex")
results_table(res_CMPsocial, "CMPsocial", "Results: Policy-loss CMPsocial", "tab:social", "tables/social_res.tex")
results_table(res_LLM, "LLM", "Results: Policy-loss LLM", "tab:llm", "tables/llm_res.tex")

results_table(res_Factor, "Factor", "Results: Policy-loss Factor", "tab:factor", "tables/factor_res.tex")

results_table(res_Keywords_cum, "Keywords", "Results: Policy-loss Keywords", "tab:keywords_res", "tables/keywords_res_cum.tex")
results_table(res_CMPgen_cum, "CMPgen", "Results: Policy-loss CMPgen", "tab:gen", "tables/gen_res_cum.tex")
results_table(res_CMPecon_cum, "CMPecon", "Results: Policy-loss CMPecon", "tab:econ", "tables/econ_res_cum.tex")
results_table(res_CMPsocial_cum, "CMPsocial", "Results: Policy-loss CMPsocial", "tab:social", "tables/social_res_cum.tex")
results_table(res_LLM_cum, "LLM", "Results: Policy-loss LLM", "tab:llm", "tables/llm_res_cum.tex")

results_table(res_Factor_cum, "Factor", "Results: Policy-loss Factor (cummulative)", "tab:factor", "tables/factor_res_cum.tex")

##################
modelnames <- c("FE", "FE + left-right", "FE + left-right + controls") 

est <- bind_rows(
extract_treatment(res_Keywords, "policy_loss_Keywords","Keywords", modelnames),
extract_treatment(res_Keywords_cum, "policy_loss_Keywords_cum","Keywords", modelnames),
extract_treatment(res_CMPgen, "policy_loss_CMPgen", "CMPgen", modelnames),
extract_treatment(res_CMPgen_cum, "policy_loss_CMPgen_cum", "CMPgen", modelnames),
extract_treatment(res_CMPecon, "policy_loss_CMPecon", "CMPecon", modelnames),
extract_treatment(res_CMPecon_cum, "policy_loss_CMPecon_cum", "CMPecon", modelnames),
extract_treatment(res_CMPsocial, "policy_loss_CMPsocial", " CMPsocial", modelnames),
extract_treatment(res_CMPsocial_cum, "policy_loss_CMPsocial_cum", " CMPsocial", modelnames),
extract_treatment(res_LLM, "policy_loss_LLM", "  LLM", modelnames),
extract_treatment(res_LLM_cum, "policy_loss_LLM_cum", "  LLM", modelnames),
extract_treatment(res_Factor, "policy_loss_Factor", "  Factor", modelnames),
extract_treatment(res_Factor_cum, "policy_loss_Factor_cum", "  Factor", modelnames)
) 

est$`Policy loss` <- ifelse(grepl("_cum", est$term), "cummulative", "immediate")

est |> 
  ggplot(aes(y = modtype, x = estimate,group = `Policy loss`, col = `Policy loss`)) +
  geom_point(position = position_dodge(width = 0.4)) +
  geom_errorbar(aes(xmin = estimate - 1.96*std.error,
                    xmax = estimate + 1.96*std.error),
                width =.0,   position = position_dodge(width = 0.4)) +
  geom_vline(xintercept = 0, linetype = "dashed", col = "red") +
theme_minimal() +
  ylab("")+
  xlab("Coefficient of policy loss on probability of EU support")+
  facet_wrap(~modelnames, ncol = 3, scales = "free_x") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 10),
        axis.text.y = element_text(hjust = 1, size = 10),
        plot.margin=grid::unit(c(0,10,0,-1), "mm"),
        strip.text = element_text(size = 10)) +
  theme(legend.position="bottom")
  
ggsave("figures/overall_model.png", 
       width = 8, height = 3.25, units = "in", dpi = 450)

### magnitude effect
res_Factor$mod_3$coefficients[1]*max(hh_data$policy_loss_Factor)
res_Factor_cum$mod_3$coefficients[1]*max(hh_data$policy_loss_Factor_cum)

policy_loss_pr_overall <- predict_response(res_Factor$mod_3, terms = c("policy_loss_Factor [all]"),
                                      margin = "marginalmeans")
summary(policy_loss_pr_overall)

pr_sex <- predict_response(res_Factor$mod_3, terms = c("sex [all]"),
                           margin = "marginalmeans")
summary(pr_sex)

pr_education <- predict_response(res_Factor$mod_3, terms = c("education [all]"),
                                           margin = "marginalmeans")
summary(pr_education)

pr_age <- predict_response(res_Factor$mod_3, terms = c("age [all]"),
                                 margin = "marginalmeans")
summary(pr_age)

pr_age |> 
  filter(x == 70) |> 
  summary()

pr_age |> 
  filter(x == 20) |> 
  summary()

# ####################
# ## only factor plot
 bind_rows(
   extract_treatment(res_Factor, "policy_loss_Factor", modelnames)
 ) |> 
   ggplot(aes(y = modtype, x = estimate)) +
   geom_point() +
   geom_errorbar(aes(xmin = estimate - 1.96*std.error,
                     xmax = estimate + 1.96*std.error),
                 width =.0) +
   geom_vline(xintercept = 0, linetype = "dashed", col = "red") +
   theme_minimal() +
   xlab("Polity-loss (Factor-model)")+
   ylab("")+
   theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 16),
         axis.text.y = element_text(angle = 0, hjust = 1, size = 16)) 
# ggsave("figures/factor_overall_model.png", 
#        width = 6, height = 5, units = "in", dpi = 450)


## binning
hh_data$bin_loss_Factor <- bin(hh_data$policy_loss_Factor, "cut::.75]3]5]")
hh_data$bin_loss_Factor_cum <- bin(hh_data$policy_loss_Factor_cum, "cut::.75]3]5]")
bin_factor <- feols(proeu ~ bin_loss_Factor + education + left_right + sex + age + age_sq  | country_year, data = hh_data, vcov = "cluster")
bin_factor_edu <- feols(proeu ~ bin_loss_Factor * education + left_right + sex + age + age_sq  | country_year, data = hh_data, vcov = "cluster")
summary(bin_factor)
summary(bin_factor_edu)

texreg::texreg(list(bin_factor, bin_factor_edu),
               custom.coef.name = c("Policy loss [0.75; 2.99]",
                                    "Policy loss [3.01; 5.00]",
                                    "Policy loss [5.04; 7.55]",
                                    "Left - Right",
                                    "Univerity",
                                    "Female",
                                    "Age",
                                    "Age squared",
                                    "Univeristy * Policy loss [0.75; 2.99]",
                                    "Univeristy * Policy loss [3.01; 5.00]",
                                    "Univeristy * Policy loss [5.04; 7.55]"),
               caption = "Binned policy loss (immenent, cummulative)",
               label = "tab:binned_loss",
               include.rsquared = FALSE,
               include.adjrs = FALSE,
               include.proj.stats = FALSE, 
               include.deviance = FALSE,
               include.pseudors = FALSE,
               include.loglik = FALSE,
               custom.gof.rows = list("Cluster st.errors" = c("Country-year", "Country-year")),
               custom.note = "Robust standard errors in parentences.",
               sideways =TRUE,
               scalebox = .8,
               use.packages = FALSE,dcolum = TRUE,
               digits = 3,
               stars = c(0.001, 0.01, 0.05),
               file = "tables/binned_loss.tex") 

############################
### Education, factor model 
res_Factor$mod_4 <- feols(proeu ~ policy_loss_Factor + left_right + education + sex + age + age_sq +
                            policy_loss_Factor:education | country_year, vcov = "cluster", data = hh_data)
policy_loss_preds <- predict_response(res_Factor$mod_4, terms = c("policy_loss_Factor [all]", "education [all]"),
                                      margin = "marginalmeans")

 policy_loss_preds |> 
  plot(n_rows = 1, colors = c("black", "blue"), show_title = FALSE, show_x_title = FALSE, 
       show_y_title = FALSE, use_theme = FALSE) +
  theme_minimal()+
  theme(axis.text.x = element_text( hjust = 1, size = 10))+
  theme(axis.text.y = element_text(hjust = 1, size = 10))+
  theme(axis.title=element_text(size=12))+
  theme(plot.margin=grid::unit(c(0,0,0,5), "mm")) +
  xlab("Policy-loss (Factor Immediate)")+
  ylab("Probablity of supporting the EU")+
xlim(0,5.2) +
   geom_jitter() +
   geom_rug(sides="b", alpha = 1/2, position = "jitter") 

ggsave("figures/peds_edu_factor.png", 
       width = 6, height = 2.75, units = "in", dpi = 450)

## Factor_cum
# res_Factor_cum$mod_4 <- feols(proeu ~ policy_loss_Factor_cum + left_right + education + sex + age + age_sq +
#                             policy_loss_Factor_cum:education | country_year, vcov = "cluster", data = hh_data)
# policy_loss_cum_preds <- predict_response(res_Factor_cum$mod_4, terms = c("policy_loss_Factor_cum [all]", "education [all]"),
#                                           margin = "marginalmeans")
# 
# policy_loss_cum_preds |> 
#   plot(n_rows = 1, colors = c("black", "blue"), show_title = FALSE, show_x_title = FALSE, show_y_title = FALSE, use_theme = FALSE) +
#   theme_minimal()+
#   theme(axis.text.x = element_text( hjust = 1, size = 10))+
#   theme(axis.text.y = element_text(hjust = 1, size = 10))+
#   theme(axis.title=element_text(size=12))+
#   xlab("Policy-loss (Factor Cummulative)")+
#   ylab("Probablity of supporting the EU")+
#   xlim(0,5.2) +
#   geom_jitter() +
#   geom_rug(sides="b", alpha = 1/2, position = "jitter")
# 
# 
# ggsave("figures/peds_edu_factor_cum.png", 
#                         width = 6, height = 2.75, units = "in", dpi = 450)
###### identity models
id_data <- hh_data |> 
  filter(!is.na(identity))
mod_identity <- list()
mod_identity_cum <- list()
mod_identity$mod_1 <- feols(identity ~ policy_loss_Factor | country_year, data = id_data,
                      vcov = "cluster") 
mod_identity$mod_2 <- feols(identity ~ policy_loss_Factor + left_right| country_year, data = id_data,
                      vcov = "cluster") 
mod_identity$mod_3 <- feols(identity ~ policy_loss_Factor + left_right + education + sex + age + age_sq| country_year, data = id_data,
                            vcov = "cluster") 
mod_identity_cum$mod_1 <- feols(identity ~ policy_loss_Factor_cum | country_year, data = id_data,
                            vcov = "cluster") 
mod_identity_cum$mod_2 <- feols(identity ~ policy_loss_Factor_cum + left_right| country_year, data = id_data,
                            vcov = "cluster") 
mod_identity_cum$mod_3 <- feols(identity ~ policy_loss_Factor_cum + left_right + education + sex + age + age_sq| country_year, data = id_data,
                            vcov = "cluster") 


modelnames <- c("FE", "FE + left-right", "FE + left-right + controls") 

est <- bind_rows(
  extract_treatment(mod_identity, "policy_loss_Factor", "  Factor", modelnames),
  extract_treatment(mod_identity_cum, "policy_loss_Factor_cum", "  Factor", modelnames)
) 

est$`Policy loss` <- ifelse(grepl("_cum", est$term), "cummulative", "immediate")

est |> 
  ggplot(aes(y = modtype, x = estimate,group = `Policy loss`, col = `Policy loss`)) +
  geom_point(position = position_dodge(width = 0.4)) +
  geom_errorbar(aes(xmin = estimate - 1.96*std.error,
                    xmax = estimate + 1.96*std.error),
                width =.0,   position = position_dodge(width = 0.4)) +
  geom_vline(xintercept = 0, linetype = "dashed", col = "red") +
  theme_minimal() +
  ylab("")+
  xlab("Coefficient of policy loss on probability of identifying as European")+
  facet_wrap(~modelnames, ncol = 3, scales = "free_x") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, size = 10),
        axis.text.y = element_text(hjust = 1, size = 10),
        plot.margin=grid::unit(c(0,0,0,5), "mm"),
        strip.text = element_text(size = 10))+
  theme(legend.position="bottom")

ggsave("figures/peds_identity.png", 
       width = 6, height = 2.5, units = "in", dpi = 450)

pr_identity <- predict_response(mod_identity$mod_3, terms = c("policy_loss_Factor [all]"),
                                          margin = "marginalmeans")
pr_identity_cum <- predict_response(mod_identity_cum$mod_3, terms = c("policy_loss_Factor_cum [all]"),
                                margin = "marginalmeans")
summary(pr_identity)
summary(pr_identity_cum)

## Country-models
country_factor <- country_models(hh_data, "policy_loss_Factor")

country_factor |> 
  rename(countries = modtype) |> 
  arrange(estimate) |>      # First sort by val. This sort the dataframe but NOT the factor levels
  mutate(countries=factor(countries, levels=countries)) |> # Then ensure that the factor levels are corrected
  ggplot(aes(x = countries, y = as.numeric(estimate))) +
  geom_point() +
  geom_errorbar(aes(ymin = estimate - 1.96*std.error, 
                    ymax = estimate + 1.96*std.error), 
                width = 0) +
  ylab("") +
  xlab("") +
  theme_minimal() +
  geom_hline(yintercept = 0, col = "red", linetype = "dashed")+
  theme(axis.text.x = element_text(angle = 45, hjust = 1, , vjust = 1, size = 8)) +
  theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 8)) +
  theme(plot.margin=grid::unit(c(0,0,0,5), "mm"))
ggsave("figures/country_models_factor.png",
       width = 6, height = 3, units = "in", dpi = 450)


###### annual models
res_old <- annual_models(hh_data, "policy_loss_Factor", old = 1)
new_data <- hh_data |> 
  filter(year>2003)
res_new <- annual_models(new_data, "policy_loss_Factor", old = 0)

res_old$`Member state` <- "Old 15"
res_new$`Member state` <- "New 13"

year_eff <- rbind(res_old, res_new)

year_eff |> 
  ggplot(aes(x = modtype, y = estimate, color = `Member state`, group = `Member state`)) +
  geom_smooth()+
  geom_point(position = position_dodge(width = .3)) +
  geom_errorbar(aes(ymin = estimate - 1.96*std.error, 
                    ymax = estimate + 1.96*std.error), 
                width = 0, position = position_dodge(width = .3)) +
  ylab("Policy loss (Factor)") +
  xlab("") +
  theme_minimal() +
  geom_hline(yintercept = 0, col = "red", linetype = "dashed")+
  theme(axis.text.x=element_text(size = 8),
        axis.text.y = element_text(size = 8),
        axis.title=element_text(size=10)) +
  theme(plot.margin=grid::unit(c(5,0,0,3), "mm"))
ggsave("figures/annual_models_factor.png",
       width = 6, height = 2, units = "in", dpi = 450)




###############
# scale and location
#########################################
### bias
bias_scale <- function(data){
  scale_val <- seq(1, 3, length = 250) #no adjustment to identical scale
  res_bias <- matrix(NA, ncol = 3, nrow = length(scale_val))
  for (i in seq(scale_val)){
    temp_data <- data
    temp_data$loss_sq <- I((temp_data$Factor - scale(temp_data$left_right_orig, scale = scale_val[i]))^2)
    
    tt <- feols(proeu ~ loss_sq |  country_year, 
                data = temp_data, vcov = "cluster",  weights = hh_data$weights)
    res_bias[i,] <- c(scale_val[i], coefficients(tt), se(tt))
  }
  colnames(res_bias) <- c("bias", "coef", "se")
  res_bias
}
scale_bias <- bias_scale(hh_data)

bias_location <- function(data){
  bias_val <- seq(-1, 1, length = 250) # maximum bias in location
  res_bias_location <- matrix(NA, ncol = 3, nrow = length(bias_val))
  for (i in seq(bias_val)){
    temp_data <- data
    temp_data$loss_sq <- I((temp_data$Factor - (scale(temp_data$left_right_orig, scale = 2) + bias_val[i]))^2)
    tt <- feols(proeu ~ loss_sq |  country_year, 
                data = temp_data, vcov = "cluster", weights = hh_data$weights)
    res_bias_location[i,] <- c(bias_val[i], coefficients(tt), se(tt))
  }
  colnames(res_bias_location) <- c("bias", "coef", "se")
  res_bias_location
}

location_bias <- bias_location(hh_data)

scale_bias |>
  ggplot(aes(y = coef, x = bias)) +
  geom_point(size = .5) +
  geom_errorbar(aes(ymin = coef - 1.96*se, 
                    ymax = coef + 1.96*se), 
                width = 0.0005, col = "lightblue") +
  xlab("Bias in scale") +
  ylab("") +
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 0, hjust = 1, size = 10),
        axis.title.x = element_text(size = 8),
        axis.text.y = element_text(hjust = 1, size = 10),
        plot.margin=grid::unit(c(4,0,4,0), "mm")) +
  geom_hline(yintercept = 0, col = "red", linetype = "dashed")
ggsave("figures/bias_scale.png",
       width = 3, height = 2, units = "in", dpi = 450)

location_bias |> 
  ggplot(aes(y = coef, x = bias)) +
  geom_point(size = .1) +
  geom_errorbar(aes(ymin = coef - 1.96*se, 
                    ymax = coef + 1.96*se), 
                width = 0.0005, col = "lightblue") +
  xlab("Bias in location") +
  ylab("") +
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 0, hjust = 1, size = 10),
        axis.title.x = element_text(size = 8),
        axis.text.y = element_text(hjust = 1, size = 10),
        plot.margin=grid::unit(c(4,0,4,0), "mm"))+
  geom_hline(yintercept = 0, col = "red", linetype = "dashed")
ggsave("figures/bias_location.png",
       width = 3, height = 2, units = "in", dpi = 450)
########

####################
## Alternative modelling: logit
logit_Factor <- logit_models(hh_data, "policy_loss_Factor")
logit_Factor_cum <- logit_models(hh_data, "policy_loss_Factor_cum")

results_table(logit_Factor, "Factor", "Results: Policy-loss Factor", "tab:factor_logit", "tables/factor_logit.tex")

results_table(logit_Factor_cum, "Factor", "Results: Policy-loss Factor (cummulative)", "tab:factor_logit_cum", "tables/factor_logit_cum.tex")

##### absolute policy loss
res_Factor_abs <- models(hh_data, "policy_loss_Factor_abs")
res_Factor_abs_cum <- logit_models(hh_data, "policy_loss_Factor_cum")

results_table(res_Factor_abs, "Factor", "Results: Policy-loss Factor", "tab:factor_abs", "tables/factor_abs.tex")

results_table(res_Factor_abs_cum, "Factor", "Results: Policy-loss Factor (cummulative)", "tab:factor_abs_cum", "tables/factor_abs_cum.tex")